#install.packages("stats19")
#install.packages("sugrrants")
#install.packages("tidyverse")
#install.packages("sf")
#install.packages("lubridate")
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggplot2)
library(stats19)
## Data provided under OGL v3.0. Cite the source and link to:
## www.nationalarchives.gov.uk/doc/open-government-licence/version/3/
library(sugrrants)
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ tibble 2.1.3 ✓ dplyr 0.8.3
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.4.0
## ✓ purrr 0.3.3
## ── Conflicts ─────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks plotly::filter(), stats::filter()
## x dplyr::lag() masks stats::lag()
library(sf)
## Linking to GEOS 3.6.2, GDAL 2.2.3, PROJ 4.9.3
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
A <- read.csv("~/SharedFiles/ST606/2020/data/UKroadsafety/dftRoadSafetyData_Accidents_2018.csv")
C <- read.csv("~/SharedFiles/ST606/2020/data/UKroadsafety/dftRoadSafetyData_Casualties_2018.csv")
V <- read.csv("~/SharedFiles/ST606/2020/data/UKroadsafety/dftRoadSafetyData_Vehicles_2018.csv")
# View(A) #Accident
# View(C) #Casualities
# View(V) #Vehicles
nrow(A) #no. of rows
## [1] 122635
#view columns names
column_names = names(A)
length(column_names)
## [1] 32
column_names
## [1] "Accident_Index"
## [2] "Location_Easting_OSGR"
## [3] "Location_Northing_OSGR"
## [4] "Longitude"
## [5] "Latitude"
## [6] "Police_Force"
## [7] "Accident_Severity"
## [8] "Number_of_Vehicles"
## [9] "Number_of_Casualties"
## [10] "Date"
## [11] "Day_of_Week"
## [12] "Time"
## [13] "Local_Authority_.District."
## [14] "Local_Authority_.Highway."
## [15] "X1st_Road_Class"
## [16] "X1st_Road_Number"
## [17] "Road_Type"
## [18] "Speed_limit"
## [19] "Junction_Detail"
## [20] "Junction_Control"
## [21] "X2nd_Road_Class"
## [22] "X2nd_Road_Number"
## [23] "Pedestrian_Crossing.Human_Control"
## [24] "Pedestrian_Crossing.Physical_Facilities"
## [25] "Light_Conditions"
## [26] "Weather_Conditions"
## [27] "Road_Surface_Conditions"
## [28] "Special_Conditions_at_Site"
## [29] "Carriageway_Hazards"
## [30] "Urban_or_Rural_Area"
## [31] "Did_Police_Officer_Attend_Scene_of_Accident"
## [32] "LSOA_of_Accident_Location"
length(rownames)
## [1] 1
glimpse(A)
## Observations: 122,635
## Variables: 32
## $ Accident_Index <fct> 2018010080971, 2018010080…
## $ Location_Easting_OSGR <int> 529150, 542020, 531720, 5…
## $ Location_Northing_OSGR <int> 182270, 184290, 182910, 1…
## $ Longitude <dbl> -0.139737, 0.046471, -0.1…
## $ Latitude <dbl> 51.52459, 51.53965, 51.52…
## $ Police_Force <int> 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ Accident_Severity <int> 3, 3, 3, 2, 2, 3, 2, 3, 3…
## $ Number_of_Vehicles <int> 2, 1, 2, 2, 2, 2, 2, 3, 2…
## $ Number_of_Casualties <int> 2, 1, 1, 1, 2, 4, 1, 1, 1…
## $ Date <fct> 01/01/2018, 01/01/2018, 0…
## $ Day_of_Week <int> 2, 2, 2, 2, 2, 2, 2, 2, 2…
## $ Time <fct> 01:30, 00:50, 00:45, 03:0…
## $ Local_Authority_.District. <int> 2, 17, 3, 17, 6, 30, 12, …
## $ Local_Authority_.Highway. <fct> E09000007, E09000025, E09…
## $ X1st_Road_Class <int> 3, 4, 3, 4, 3, 3, 5, 3, 6…
## $ X1st_Road_Number <int> 501, 165, 1, 167, 207, 10…
## $ Road_Type <int> 3, 6, 6, 3, 6, 6, 6, 2, 6…
## $ Speed_limit <int> 30, 30, 20, 30, 30, 30, 3…
## $ Junction_Detail <int> 0, 2, 6, 7, 0, 0, 2, 5, 3…
## $ Junction_Control <int> -1, 4, 4, 2, -1, -1, 4, 4…
## $ X2nd_Road_Class <int> -1, 6, 5, 3, -1, -1, 5, 3…
## $ X2nd_Road_Number <int> 0, 0, 0, 124, 0, 0, 0, 20…
## $ Pedestrian_Crossing.Human_Control <int> 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Pedestrian_Crossing.Physical_Facilities <int> 0, 0, 5, 5, 0, 0, 0, 0, 8…
## $ Light_Conditions <int> 4, 4, 4, 4, 4, 4, 4, 7, 4…
## $ Weather_Conditions <int> 1, 1, 1, 2, 1, 5, 1, 1, 1…
## $ Road_Surface_Conditions <int> 1, 1, 1, 2, 2, 2, 1, 1, 1…
## $ Special_Conditions_at_Site <int> 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Carriageway_Hazards <int> 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Urban_or_Rural_Area <int> 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ Did_Police_Officer_Attend_Scene_of_Accident <int> 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ LSOA_of_Accident_Location <fct> E01000854, E01003531, E01…
#55 rows with no coordinates found and removed in crashes_sf
#sf_Format convert STATS19 data into spatial (sf) object
# provides the data in lon/lat format
crashes_sf = format_sf(A)
## 55 rows removed with no coordinates
glimpse(crashes_sf)
## Observations: 122,580
## Variables: 31
## $ Accident_Index <fct> 2018010080971, 2018010080…
## $ Longitude <dbl> -0.139737, 0.046471, -0.1…
## $ Latitude <dbl> 51.52459, 51.53965, 51.52…
## $ Police_Force <int> 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ Accident_Severity <int> 3, 3, 3, 2, 2, 3, 2, 3, 3…
## $ Number_of_Vehicles <int> 2, 1, 2, 2, 2, 2, 2, 3, 2…
## $ Number_of_Casualties <int> 2, 1, 1, 1, 2, 4, 1, 1, 1…
## $ Date <fct> 01/01/2018, 01/01/2018, 0…
## $ Day_of_Week <int> 2, 2, 2, 2, 2, 2, 2, 2, 2…
## $ Time <fct> 01:30, 00:50, 00:45, 03:0…
## $ Local_Authority_.District. <int> 2, 17, 3, 17, 6, 30, 12, …
## $ Local_Authority_.Highway. <fct> E09000007, E09000025, E09…
## $ X1st_Road_Class <int> 3, 4, 3, 4, 3, 3, 5, 3, 6…
## $ X1st_Road_Number <int> 501, 165, 1, 167, 207, 10…
## $ Road_Type <int> 3, 6, 6, 3, 6, 6, 6, 2, 6…
## $ Speed_limit <int> 30, 30, 20, 30, 30, 30, 3…
## $ Junction_Detail <int> 0, 2, 6, 7, 0, 0, 2, 5, 3…
## $ Junction_Control <int> -1, 4, 4, 2, -1, -1, 4, 4…
## $ X2nd_Road_Class <int> -1, 6, 5, 3, -1, -1, 5, 3…
## $ X2nd_Road_Number <int> 0, 0, 0, 124, 0, 0, 0, 20…
## $ Pedestrian_Crossing.Human_Control <int> 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Pedestrian_Crossing.Physical_Facilities <int> 0, 0, 5, 5, 0, 0, 0, 0, 8…
## $ Light_Conditions <int> 4, 4, 4, 4, 4, 4, 4, 7, 4…
## $ Weather_Conditions <int> 1, 1, 1, 2, 1, 5, 1, 1, 1…
## $ Road_Surface_Conditions <int> 1, 1, 1, 2, 2, 2, 1, 1, 1…
## $ Special_Conditions_at_Site <int> 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Carriageway_Hazards <int> 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Urban_or_Rural_Area <int> 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ Did_Police_Officer_Attend_Scene_of_Accident <int> 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ LSOA_of_Accident_Location <fct> E01000854, E01003531, E01…
## $ geometry <POINT [m]> POINT (529150 18227…
# counting and plotting the number of fatalities per police force
glimpse(A) #Accident data
## Observations: 122,635
## Variables: 32
## $ Accident_Index <fct> 2018010080971, 2018010080…
## $ Location_Easting_OSGR <int> 529150, 542020, 531720, 5…
## $ Location_Northing_OSGR <int> 182270, 184290, 182910, 1…
## $ Longitude <dbl> -0.139737, 0.046471, -0.1…
## $ Latitude <dbl> 51.52459, 51.53965, 51.52…
## $ Police_Force <int> 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ Accident_Severity <int> 3, 3, 3, 2, 2, 3, 2, 3, 3…
## $ Number_of_Vehicles <int> 2, 1, 2, 2, 2, 2, 2, 3, 2…
## $ Number_of_Casualties <int> 2, 1, 1, 1, 2, 4, 1, 1, 1…
## $ Date <fct> 01/01/2018, 01/01/2018, 0…
## $ Day_of_Week <int> 2, 2, 2, 2, 2, 2, 2, 2, 2…
## $ Time <fct> 01:30, 00:50, 00:45, 03:0…
## $ Local_Authority_.District. <int> 2, 17, 3, 17, 6, 30, 12, …
## $ Local_Authority_.Highway. <fct> E09000007, E09000025, E09…
## $ X1st_Road_Class <int> 3, 4, 3, 4, 3, 3, 5, 3, 6…
## $ X1st_Road_Number <int> 501, 165, 1, 167, 207, 10…
## $ Road_Type <int> 3, 6, 6, 3, 6, 6, 6, 2, 6…
## $ Speed_limit <int> 30, 30, 20, 30, 30, 30, 3…
## $ Junction_Detail <int> 0, 2, 6, 7, 0, 0, 2, 5, 3…
## $ Junction_Control <int> -1, 4, 4, 2, -1, -1, 4, 4…
## $ X2nd_Road_Class <int> -1, 6, 5, 3, -1, -1, 5, 3…
## $ X2nd_Road_Number <int> 0, 0, 0, 124, 0, 0, 0, 20…
## $ Pedestrian_Crossing.Human_Control <int> 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Pedestrian_Crossing.Physical_Facilities <int> 0, 0, 5, 5, 0, 0, 0, 0, 8…
## $ Light_Conditions <int> 4, 4, 4, 4, 4, 4, 4, 7, 4…
## $ Weather_Conditions <int> 1, 1, 1, 2, 1, 5, 1, 1, 1…
## $ Road_Surface_Conditions <int> 1, 1, 1, 2, 2, 2, 1, 1, 1…
## $ Special_Conditions_at_Site <int> 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Carriageway_Hazards <int> 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Urban_or_Rural_Area <int> 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ Did_Police_Officer_Attend_Scene_of_Accident <int> 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ LSOA_of_Accident_Location <fct> E01000854, E01003531, E01…
A_for<-format_accidents(crashes_sf) #date time format column will be added in the data (Format STATS19 'accidents' data)
## date and time columns present, creating formatted datetime column
glimpse(A_for) #Checking after formatting
## Observations: 122,580
## Variables: 32
## $ accident_index <fct> 2018010080971, 2018010080…
## $ longitude <dbl> -0.139737, 0.046471, -0.1…
## $ latitude <dbl> 51.52459, 51.53965, 51.52…
## $ police_force <chr> "Metropolitan Police", "M…
## $ accident_severity <chr> "Slight", "Slight", "Slig…
## $ number_of_vehicles <int> 2, 1, 2, 2, 2, 2, 2, 3, 2…
## $ number_of_casualties <int> 2, 1, 1, 1, 2, 4, 1, 1, 1…
## $ date <date> 2018-01-01, 2018-01-01, …
## $ day_of_week <chr> "Monday", "Monday", "Mond…
## $ time <fct> 01:30, 00:50, 00:45, 03:0…
## $ local_authority_.district. <int> 2, 17, 3, 17, 6, 30, 12, …
## $ local_authority_.highway. <fct> E09000007, E09000025, E09…
## $ xfirst_road_class <int> 3, 4, 3, 4, 3, 3, 5, 3, 6…
## $ xfirst_road_number <int> 501, 165, 1, 167, 207, 10…
## $ road_type <chr> "Dual carriageway", "Sing…
## $ speed_limit <int> 30, 30, 20, 30, 30, 30, 3…
## $ junction_detail <chr> "Not at junction or withi…
## $ junction_control <chr> "Data missing or out of r…
## $ xsecond_road_class <int> -1, 6, 5, 3, -1, -1, 5, 3…
## $ xsecond_road_number <int> 0, 0, 0, 124, 0, 0, 0, 20…
## $ pedestrian_crossing.human_control <int> 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ pedestrian_crossing.physical_facilities <int> 0, 0, 5, 5, 0, 0, 0, 0, 8…
## $ light_conditions <chr> "Darkness - lights lit", …
## $ weather_conditions <chr> "Fine no high winds", "Fi…
## $ road_surface_conditions <chr> "Dry", "Dry", "Dry", "Wet…
## $ special_conditions_at_site <chr> "None", "None", "None", "…
## $ carriageway_hazards <chr> "None", "None", "None", "…
## $ urban_or_rural_area <chr> "Urban", "Urban", "Urban"…
## $ did_police_officer_attend_scene_of_accident <int> 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ lsoa_of_accident_location <fct> E01000854, E01003531, E01…
## $ geometry <POINT [m]> POINT (529150 18227…
## $ datetime <dttm> 2018-01-01 01:30:00, 201…
unique(A_for$police_force)
## [1] "Metropolitan Police" "City of London" "Cumbria"
## [4] "Lancashire" "Merseyside" "Greater Manchester"
## [7] "Cheshire" "Northumbria" "Durham"
## [10] "North Yorkshire" "West Yorkshire" "South Yorkshire"
## [13] "Humberside" "Cleveland" "West Midlands"
## [16] "Staffordshire" "West Mercia" "Warwickshire"
## [19] "Derbyshire" "Nottinghamshire" "Lincolnshire"
## [22] "Leicestershire" "Northamptonshire" "Cambridgeshire"
## [25] "Norfolk" "Suffolk" "Bedfordshire"
## [28] "Hertfordshire" "Essex" "Thames Valley"
## [31] "Hampshire" "Surrey" "Kent"
## [34] "Sussex" "Devon and Cornwall" "Avon and Somerset"
## [37] "Gloucestershire" "Wiltshire" "Dorset"
## [40] "North Wales" "Gwent" "South Wales"
## [43] "Dyfed-Powys" "Northern" "Grampian"
## [46] "Tayside" "Fife" "Lothian and Borders"
## [49] "Central" "Strathclyde" "Dumfries and Galloway"
#view(A_for)
#counting and plotting the number of fatalities per police force ()
par(mfrow=c(1,3))
glimpse(crashes_sf)
## Observations: 122,580
## Variables: 31
## $ Accident_Index <fct> 2018010080971, 2018010080…
## $ Longitude <dbl> -0.139737, 0.046471, -0.1…
## $ Latitude <dbl> 51.52459, 51.53965, 51.52…
## $ Police_Force <int> 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ Accident_Severity <int> 3, 3, 3, 2, 2, 3, 2, 3, 3…
## $ Number_of_Vehicles <int> 2, 1, 2, 2, 2, 2, 2, 3, 2…
## $ Number_of_Casualties <int> 2, 1, 1, 1, 2, 4, 1, 1, 1…
## $ Date <fct> 01/01/2018, 01/01/2018, 0…
## $ Day_of_Week <int> 2, 2, 2, 2, 2, 2, 2, 2, 2…
## $ Time <fct> 01:30, 00:50, 00:45, 03:0…
## $ Local_Authority_.District. <int> 2, 17, 3, 17, 6, 30, 12, …
## $ Local_Authority_.Highway. <fct> E09000007, E09000025, E09…
## $ X1st_Road_Class <int> 3, 4, 3, 4, 3, 3, 5, 3, 6…
## $ X1st_Road_Number <int> 501, 165, 1, 167, 207, 10…
## $ Road_Type <int> 3, 6, 6, 3, 6, 6, 6, 2, 6…
## $ Speed_limit <int> 30, 30, 20, 30, 30, 30, 3…
## $ Junction_Detail <int> 0, 2, 6, 7, 0, 0, 2, 5, 3…
## $ Junction_Control <int> -1, 4, 4, 2, -1, -1, 4, 4…
## $ X2nd_Road_Class <int> -1, 6, 5, 3, -1, -1, 5, 3…
## $ X2nd_Road_Number <int> 0, 0, 0, 124, 0, 0, 0, 20…
## $ Pedestrian_Crossing.Human_Control <int> 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Pedestrian_Crossing.Physical_Facilities <int> 0, 0, 5, 5, 0, 0, 0, 0, 8…
## $ Light_Conditions <int> 4, 4, 4, 4, 4, 4, 4, 7, 4…
## $ Weather_Conditions <int> 1, 1, 1, 2, 1, 5, 1, 1, 1…
## $ Road_Surface_Conditions <int> 1, 1, 1, 2, 2, 2, 1, 1, 1…
## $ Special_Conditions_at_Site <int> 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Carriageway_Hazards <int> 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Urban_or_Rural_Area <int> 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ Did_Police_Officer_Attend_Scene_of_Accident <int> 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ LSOA_of_Accident_Location <fct> E01000854, E01003531, E01…
## $ geometry <POINT [m]> POINT (529150 18227…
#ploce_boundaries()- The geographic boundary data were taken from the UK government's official geographic data portal
#Plot for slight injuries
A_for %>%
filter(accident_severity == "Slight") %>%
select(Slight_Injuries = accident_index) %>%
aggregate(by = police_boundaries, FUN = length) %>%
plot()
#plot for serious injuries
A_for %>%
filter(accident_severity == "Serious") %>%
select(num_of_serious_cases = accident_index) %>%
aggregate(by = police_boundaries, FUN = length) %>%
plot()
#plot for fatalities
A_for %>%
filter(accident_severity == "Fatal") %>%
select(Number_of_Fatalities = accident_index) %>%
aggregate(by = police_boundaries, FUN = length) %>%
plot()
names(crashes_sf)
## [1] "Accident_Index"
## [2] "Longitude"
## [3] "Latitude"
## [4] "Police_Force"
## [5] "Accident_Severity"
## [6] "Number_of_Vehicles"
## [7] "Number_of_Casualties"
## [8] "Date"
## [9] "Day_of_Week"
## [10] "Time"
## [11] "Local_Authority_.District."
## [12] "Local_Authority_.Highway."
## [13] "X1st_Road_Class"
## [14] "X1st_Road_Number"
## [15] "Road_Type"
## [16] "Speed_limit"
## [17] "Junction_Detail"
## [18] "Junction_Control"
## [19] "X2nd_Road_Class"
## [20] "X2nd_Road_Number"
## [21] "Pedestrian_Crossing.Human_Control"
## [22] "Pedestrian_Crossing.Physical_Facilities"
## [23] "Light_Conditions"
## [24] "Weather_Conditions"
## [25] "Road_Surface_Conditions"
## [26] "Special_Conditions_at_Site"
## [27] "Carriageway_Hazards"
## [28] "Urban_or_Rural_Area"
## [29] "Did_Police_Officer_Attend_Scene_of_Accident"
## [30] "LSOA_of_Accident_Location"
## [31] "geometry"
#glimpse(crashes_sf)
head(crashes_sf)
## Simple feature collection with 6 features and 30 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: 526060 ymin: 176500 xmax: 543580 ymax: 194910
## CRS: EPSG:27700
## Accident_Index Longitude Latitude Police_Force Accident_Severity
## 1 2018010080971 -0.139737 51.52459 1 3
## 2 2018010080973 0.046471 51.53965 1 3
## 3 2018010080974 -0.102474 51.52975 1 3
## 4 2018010080981 0.037828 51.53018 1 2
## 5 2018010080982 0.065781 51.46926 1 2
## 6 2018010080983 -0.179720 51.63888 1 3
## Number_of_Vehicles Number_of_Casualties Date Day_of_Week Time
## 1 2 2 01/01/2018 2 01:30
## 2 1 1 01/01/2018 2 00:50
## 3 2 1 01/01/2018 2 00:45
## 4 2 1 01/01/2018 2 03:00
## 5 2 2 01/01/2018 2 02:20
## 6 2 4 01/01/2018 2 01:52
## Local_Authority_.District. Local_Authority_.Highway. X1st_Road_Class
## 1 2 E09000007 3
## 2 17 E09000025 4
## 3 3 E09000019 3
## 4 17 E09000025 4
## 5 6 E09000011 3
## 6 30 E09000003 3
## X1st_Road_Number Road_Type Speed_limit Junction_Detail Junction_Control
## 1 501 3 30 0 -1
## 2 165 6 30 2 4
## 3 1 6 20 6 4
## 4 167 3 30 7 2
## 5 207 6 30 0 -1
## 6 1000 6 30 0 -1
## X2nd_Road_Class X2nd_Road_Number Pedestrian_Crossing.Human_Control
## 1 -1 0 0
## 2 6 0 0
## 3 5 0 0
## 4 3 124 0
## 5 -1 0 0
## 6 -1 0 0
## Pedestrian_Crossing.Physical_Facilities Light_Conditions Weather_Conditions
## 1 0 4 1
## 2 0 4 1
## 3 5 4 1
## 4 5 4 2
## 5 0 4 1
## 6 0 4 5
## Road_Surface_Conditions Special_Conditions_at_Site Carriageway_Hazards
## 1 1 0 0
## 2 1 0 0
## 3 1 0 0
## 4 2 0 0
## 5 2 0 0
## 6 2 0 0
## Urban_or_Rural_Area Did_Police_Officer_Attend_Scene_of_Accident
## 1 1 1
## 2 1 1
## 3 1 1
## 4 1 1
## 5 1 1
## 6 1 1
## LSOA_of_Accident_Location geometry
## 1 E01000854 POINT (529150 182270)
## 2 E01003531 POINT (542020 184290)
## 3 E01002723 POINT (531720 182910)
## 4 E01003492 POINT (541450 183220)
## 5 E01001682 POINT (543580 176500)
## 6 E01000271 POINT (526060 194910)
crash_table<-sort(table(A_for$day_of_week)) #Table for week when crashes occur
#barplot for days (which is the day when maximum crashes occur)
barplot(crash_table,
main="Count of Crashes on Days",
ylab="Count",
border="red",
col="blue",
density= 10,
las=2
)
#glimpse(A_for)
A_for %>% st_set_geometry(NULL) %>%
group_by(day_of_week) %>% summarize(num_accs=n()) %>%
mutate(Day_of_Week=c("Sunday","Monday","Tuesday","Wednesday",
"Thursday","Friday","Saturday")) %>%
mutate(prop=paste(round(100*num_accs/sum(num_accs),2),"%"))
## # A tibble: 7 x 4
## day_of_week num_accs Day_of_Week prop
## <chr> <int> <chr> <chr>
## 1 Friday 20010 Sunday 16.32 %
## 2 Monday 17732 Monday 14.47 %
## 3 Saturday 16061 Tuesday 13.1 %
## 4 Sunday 13796 Wednesday 11.25 %
## 5 Thursday 18647 Thursday 15.21 %
## 6 Tuesday 17943 Friday 14.64 %
## 7 Wednesday 18391 Saturday 15 %
#Drivers in road accidents split by sex
V %>% group_by(Sex_of_Driver) %>% summarize(num_accs=n()) %>%
mutate(Sex_of_Driver=c("Data Missing","Male","Female","Unknown")) %>%
mutate(prop=paste(round(100*num_accs/sum(num_accs),2),"%"))
## # A tibble: 4 x 3
## Sex_of_Driver num_accs prop
## <chr> <int> <chr>
## 1 Data Missing 2 0 %
## 2 Male 143361 63.32 %
## 3 Female 61233 27.05 %
## 4 Unknown 21813 9.63 %
paste("The data suggests that women are less likely to be drivers in a road accident. I think of two possible explanations for this: women are better drivers or there are significantly less female drivers on the road.")
## [1] "The data suggests that women are less likely to be drivers in a road accident. I think of two possible explanations for this: women are better drivers or there are significantly less female drivers on the road."
# number of accidents by hour for each day of the week (Check for some missing date values to remove filter(hour!=NULL))
hourly_data <-mutate(A_for,day=weekdays(date),hour=substring(time,1,2)) %>%
arrange(day_of_week) %>%
st_set_geometry(NULL) %>%
group_by(day, hour) %>% summarize(num_accs=n()) %>%
mutate(prop=round(100*num_accs/sum(num_accs), 1)) %>%
filter(hour!="")
hourly_data
## # A tibble: 168 x 4
## # Groups: day [7]
## day hour num_accs prop
## <chr> <chr> <int> <dbl>
## 1 Friday 00 244 1.2
## 2 Friday 01 148 0.7
## 3 Friday 02 114 0.6
## 4 Friday 03 80 0.4
## 5 Friday 04 83 0.4
## 6 Friday 05 179 0.9
## 7 Friday 06 406 2
## 8 Friday 07 940 4.7
## 9 Friday 08 1471 7.4
## 10 Friday 09 894 4.5
## # … with 158 more rows
sum(hourly_data$num_accs)
## [1] 122567
nrow(A_for)
## [1] 122580
#format_axis is a function to format axis below
# plot_ly(hourly_data %>%
# group_by(day, hour) %>% summarize(tot=sum(num_accs)) %>%
# mutate(prop=round(100*tot/sum(tot), 1)),
# x=~hour,y=~prop, color =~day, type = "scatter", mode = "lines") %>%
# add_trace(data=hourly_data %>% group_by(hour) %>%
# summarize(tot=sum(num_accs)) %>%
# mutate(prop=round(100*tot/sum(tot), 1)),
# x=~hour,y=~prop,name="All Days",
# line=list(width=3))
#Barplot Days, sum(no. of casuality) (Which are the days of week when most no. of casualities occur)
sum_casual <- A_for %>%
st_set_geometry(NULL) %>%
group_by(day_of_week) %>%
summarise(Casualities = sum(number_of_casualties))
ggplot(sum_casual, aes(reorder(x = day_of_week , -Casualities), y = Casualities, fill=day_of_week)) +
geom_bar(stat = "identity") +
geom_text(aes(label=Casualities), position=position_dodge(width=0.9), vjust=-0.35,hjust=.5)+
theme_classic() +
labs(
x = "Days",
y = "number of casualities",
title = "Number of casulaities on days in descending order"
)
#analysis can be written on the basis of above 2 barplots
V_for<-format_vehicles(V)
glimpse(V_for)
## Observations: 226,409
## Variables: 23
## $ accident_index <fct> 2018010080971, 2018010080971, 201801…
## $ vehicle_reference <int> 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, …
## $ vehicle_type <chr> "Car", "Taxi/Private hire car", "Car…
## $ towing_and_articulation <chr> "No tow/articulation", "No tow/artic…
## $ vehicle_manoeuvre <chr> "Going ahead other", "Going ahead ot…
## $ vehicle_location.restricted_lane <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ junction_location <chr> "Not at or within 20 metres of junct…
## $ skidding_and_overturning <chr> "None", "None", "None", "None", "Non…
## $ hit_object_in_carriageway <int> 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0,…
## $ vehicle_leaving_carriageway <int> 0, 0, 0, 0, 3, 0, 0, 1, 0, 0, 0, 0, …
## $ hit_object_off_carriageway <int> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, …
## $ xfirst_point_of_impact <int> 1, 2, 1, 1, 1, 1, 1, 1, 1, 3, 3, 1, …
## $ was_vehicle_left_hand_drive. <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ journey_purpose_of_driver <chr> "Not known", "Not known", "Not known…
## $ sex_of_driver <chr> "Male", "Male", "Not known", "Male",…
## $ age_of_driver <int> 32, 48, -1, 40, 21, 27, 30, 43, 55, …
## $ age_band_of_driver <int> 6, 8, -1, 7, 5, 6, 6, 7, 8, 9, 7, 10…
## $ engine_capacity_.cc. <int> 1995, 1798, -1, 1797, -1, 1200, -1, …
## $ propulsion_code <chr> "Heavy oil", "Hybrid electric", NA, …
## $ age_of_vehicle <int> 5, 6, -1, 6, -1, 1, -1, 1, -1, 11, 3…
## $ driver_imd_decile <chr> "Less deprived 20-30%", "Most depriv…
## $ driver_home_area_type <int> 1, 1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ vehicle_imd_decile <int> 8, 1, -1, 3, 5, 7, 3, 4, 3, 10, 9, 3…
C_for<-format_casualties(C)
glimpse(C_for)
## Observations: 160,597
## Variables: 16
## $ accident_index <fct> 2018010080971, 2018010080971, 2018…
## $ vehicle_reference <int> 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 2…
## $ casualty_reference <int> 1, 2, 1, 1, 1, 1, 2, 1, 2, 3, 4, 1…
## $ casualty_class <chr> "Passenger", "Driver or rider", "P…
## $ sex_of_casualty <chr> "Female", "Male", "Male", "Male", …
## $ age_of_casualty <int> 50, 48, 29, 40, 27, 43, 58, 69, 56…
## $ age_band_of_casualty <chr> "46 - 55", "46 - 55", "26 - 35", "…
## $ casualty_severity <chr> "Slight", "Slight", "Slight", "Sli…
## $ pedestrian_location <chr> "Not a Pedestrian", "Not a Pedestr…
## $ pedestrian_movement <chr> "Not a Pedestrian", "Not a Pedestr…
## $ car_passenger <chr> "Rear seat passenger", "Not car pa…
## $ bus_or_coach_passenger <chr> "Not a bus or coach passenger", "N…
## $ pedestrian_road_maintenance_worker <chr> "No / Not applicable", "No / Not a…
## $ casualty_type <chr> "Car occupant", "Taxi/Private hire…
## $ casualty_home_area_type <chr> "Urban area", "Urban area", "Urban…
## $ casualty_imd_decile <chr> "Less deprived 20-30%", "Most depr…
####
casualities = A_for %>%
st_set_geometry(NULL) %>%
group_by(date) %>%
summarise(
Fatal = sum(accident_severity=='Fatal'),
Slight = sum(accident_severity=='Slight'),
Serious = sum(accident_severity=='Serious')
) %>%
tidyr::gather(accident_severity, no_of_casualities, -date)
cas_TS<-ggplot(casualities, aes(date, no_of_casualities)) +
geom_smooth(aes(colour = accident_severity), method = "loess") +
ylab("Casualties per day")
ggplotly(cas_TS)
## `geom_smooth()` using formula 'y ~ x'
#use of sugrrants to plot no. of accidents w.r.t time pn calendar based graph
#glimpse(A_for)
#glimpse(A_for)
#A_for %>%
# group_by(dttm=floor(datetime)) %>%
# summarize(noc=sum(number_of_casualties))
#suggrnts with free scale
ps <- A_for %>%
mutate(Weekend = if_else(day_of_week %in% c("Saturday", "Sunday"), "Weekend", "Weekday")) %>%
frame_calendar(x = time, y = number_of_casualties, date = date, calendar = "monthly") %>%
ggplot(aes(x = .time, y = .number_of_casualties, group = date, colour = Weekend)) +
geom_line() +
theme(legend.position = "bottom")
prettify(ps)
#browseVignettes("sugrrants")
# #Sum2 <- aggregate(A_for["no_of_cas"],
# list(hour=cut(as.POSIXct(A_for$datetime)-1, "hour")),
# sum)
#glimpse(A_for)
#Aggreagte by hour code
# aggregate(A_for["no_of_cas"],
# list(hour=cut(as.POSIXct(A_for$datetime)-1, "hour")),sum)
#Time series prediction
#Acc_part<-A_for %>% st_set_geometry(NULL) %>% select(accident_index, number_of_casualties, date, time, day_of_week)
#Acc_part$time_slot <-as.numeric(substr(Acc_part$time,0,2))
#making a column with datetime on hours based
A_for$datetime1 <- droplevels(cut(A_for$datetime, breaks='hour'))
newdf<- A_for%>%
st_set_geometry(NULL) %>% select(accident_index,datetime1, number_of_casualties, day_of_week, date)%>%
mutate(Weekend = if_else(day_of_week %in% c("Saturday", "Sunday"), "Weekend", "Weekday")) %>%
group_by(datetiem2= fct_explicit_na(datetime1), day_of_week, date) %>%
summarize(total_accidents=n_distinct(accident_index))
#newdf$date<-as.Date(as.character(newdf$datetiem2, format = "%m/%d/%Y"))
#newdf[max(newdf$total_accidents),]
#suggrants aggreagted by hour w.r.t total accidents per hour
ps <- newdf %>%
group_by(day_of_week, date)%>%
mutate(Weekend = if_else(day_of_week %in% c("Saturday", "Sunday"), "Weekend", "Weekday")) %>%
frame_calendar(x = datetiem2, y = total_accidents, date = date, calendar = "monthly", scale = "free", ncol=4) %>%
ggplot(aes(x = .datetiem2, y = .total_accidents, group = date, colour = Weekend)) +
geom_line() +
theme(legend.position = "bottom")
prettify(ps)
#Checking for NA's value
sort(sapply(A_for, function(x) sum(is.na(x))),decreasing = TRUE)
## datetime
## 13
## datetime1
## 13
## urban_or_rural_area
## 1
## accident_index
## 0
## longitude
## 0
## latitude
## 0
## police_force
## 0
## accident_severity
## 0
## number_of_vehicles
## 0
## number_of_casualties
## 0
## date
## 0
## day_of_week
## 0
## time
## 0
## local_authority_.district.
## 0
## local_authority_.highway.
## 0
## xfirst_road_class
## 0
## xfirst_road_number
## 0
## road_type
## 0
## speed_limit
## 0
## junction_detail
## 0
## junction_control
## 0
## xsecond_road_class
## 0
## xsecond_road_number
## 0
## pedestrian_crossing.human_control
## 0
## pedestrian_crossing.physical_facilities
## 0
## light_conditions
## 0
## weather_conditions
## 0
## road_surface_conditions
## 0
## special_conditions_at_site
## 0
## carriageway_hazards
## 0
## did_police_officer_attend_scene_of_accident
## 0
## lsoa_of_accident_location
## 0
## geometry
## 0
#EDA
#1- Number of accident in particular week day(first one is with reference of no, of casulaities. Below graph is based on Number of accident).
noa<- A_for %>%
group_by(day_of_week) %>%
summarize(total_accidents=n_distinct(accident_index)) %>%
ggplot(aes(x=day_of_week, y=total_accidents)) +
geom_bar(stat="identity", fill="steelblue")+
geom_text(aes(label=total_accidents), vjust=1.6, color="white", size=3.5)
noa
#Accident by hours
#for time slot
A_for$time_slot <-as.numeric(substr(A_for$time,0,2))
A_for %>%
group_by(time_slot) %>%
summarize(total_accidents=n_distinct(accident_index)) %>%
ggplot(aes(x=time_slot, y=total_accidents)) +
geom_bar(stat="identity", fill="steelblue")+
geom_text(aes(label=total_accidents), vjust=1.6, color="black", size=3)+
scale_x_continuous(breaks = round(seq(0, 24, by = 1),0)) +
ggtitle("Total Accidents by Hours ") +
xlab("Hours") + ylab("Total Accidents")+
theme(plot.title = element_text(hjust = 0.5), panel.background = element_blank())
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (geom_text).
paste("From hours distribution of accidents it is clear that accidents tend to occur on the business hours when people commute to work.")
## [1] "From hours distribution of accidents it is clear that accidents tend to occur on the business hours when people commute to work."
#Slight Accident by hours
A_for %>%
filter(accident_severity=="Slight")%>%
group_by(time_slot) %>%
summarize(total_accidents=n_distinct(accident_index)) %>%
ggplot(aes(x=time_slot, y=total_accidents)) +
geom_bar(stat="identity", fill="steelblue")+
geom_text(aes(label=total_accidents), vjust=1.6, color="black", size=3)+
scale_x_continuous(breaks = round(seq(0, 24, by = 1),0)) +
ggtitle("Total Slight Accidents by Hours ") +
xlab("Hours") + ylab("Total Accidents")+
theme(plot.title = element_text(hjust = 0.5), panel.background = element_blank())
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (geom_text).
#serious accidents by hours
A_for %>%
filter(accident_severity=="Serious")%>%
group_by(time_slot) %>%
summarize(total_accidents=n_distinct(accident_index)) %>%
ggplot(aes(x=time_slot, y=total_accidents)) +
geom_bar(stat="identity", fill="steelblue")+
geom_text(aes(label=total_accidents), vjust=1.6, color="black", size=3)+
scale_x_continuous(breaks = round(seq(0, 24, by = 1),0)) +
ggtitle("Total Serious Accidents by Hours ") +
xlab("Hours") + ylab("Total Accidents")+
theme(plot.title = element_text(hjust = 0.5), panel.background = element_blank())
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (geom_text).
#Fatal accidents by hours
A_for %>%
filter(accident_severity=="Fatal")%>%
group_by(time_slot) %>%
summarize(total_accidents=n_distinct(accident_index)) %>%
ggplot(aes(x=time_slot, y=total_accidents)) +
geom_bar(stat="identity", fill="steelblue")+
geom_text(aes(label=total_accidents), vjust=1.6, color="black", size=3)+
scale_x_continuous(breaks = round(seq(0, 24, by = 1),0)) +
ggtitle("Total Fatal Accidents by Hours") +
xlab("Hours") + ylab("Total Accidents")+
theme(plot.title = element_text(hjust = 0.5), panel.background = element_blank())
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (geom_text).
#Contigency table to look the relationship between accidents by hours to check the accident time and severity.
acc_time<-table(A_for$time_slot,A_for$accident_severity)
prop.table(acc_time,1)
##
## Fatal Serious Slight
## 0 0.033787466 0.227792916 0.738419619
## 1 0.031620553 0.231620553 0.736758893
## 2 0.039886040 0.248812915 0.711301045
## 3 0.041966427 0.242206235 0.715827338
## 4 0.038847118 0.225563910 0.735588972
## 5 0.034223706 0.195325543 0.770450751
## 6 0.019370460 0.209443099 0.771186441
## 7 0.011533002 0.190028389 0.798438609
## 8 0.007965327 0.145718637 0.846316036
## 9 0.010554988 0.169390535 0.820054477
## 10 0.012026239 0.174198251 0.813775510
## 11 0.013422819 0.181535440 0.805041742
## 12 0.009984531 0.184081001 0.805934468
## 13 0.014996496 0.177435179 0.807568325
## 14 0.012845981 0.186862667 0.800291352
## 15 0.009371380 0.190270612 0.800358008
## 16 0.010604989 0.190283810 0.799111201
## 17 0.009425071 0.184165881 0.806409048
## 18 0.008557595 0.187366288 0.804076118
## 19 0.014976069 0.207194689 0.777829242
## 20 0.017891374 0.199361022 0.782747604
## 21 0.020413718 0.204953729 0.774632553
## 22 0.018995098 0.221507353 0.759497549
## 23 0.024006002 0.231057764 0.744936234
("write interpretation of the prop_table")
## [1] "write interpretation of the prop_table"
#chi-square test for acc_time vs severity
paste("NULL hypthesis : Accident severity is independent of time.
HA : accident severity is dependent on time")
## [1] "NULL hypthesis : Accident severity is independent of time.\n HA : accident severity is dependent on time"
chisq.test(acc_time)
##
## Pearson's Chi-squared test
##
## data: acc_time
## X-squared = 740.53, df = 46, p-value < 2.2e-16
paste("The p-value is significantly less than 0.05, we reject with the Null hypothesis that the accident severity is independent of the hours.")
## [1] "The p-value is significantly less than 0.05, we reject with the Null hypothesis that the accident severity is independent of the hours."
#We can do the same for others variables.
#Weekend Night accident (Friday night to saturday morning 5, saturday night to sunday morning )
#General assumption is people drink and drive on weekends
A_for$WE_night<-ifelse((A_for$day_of_week=="Friday" & A_for$time_slot %in% c(21:23)) |
A_for$day_of_week=="Saturday" & A_for$time_slot %in% c(21:23,0:5) |
A_for$day_of_week=="Sunday" & A_for$time_slot %in% c(0:5),"Yes","No")
A_for %>%
group_by(WE_night,accident_severity) %>%
summarize(total_accidents=n_distinct(accident_index)) %>%
mutate(freq = round((100*total_accidents / sum(total_accidents)),digits = 2)) %>%
ggplot(aes(x=accident_severity, y=freq,fill=WE_night)) +
geom_bar(stat="identity", position="dodge")+
geom_text(aes(label=freq), vjust=2, color="black", size=3, position=position_dodge(width=0.9))+
ggtitle("Accident Severity on WE-Night vs non WE-Night") +
xlab("Accident Severity") + ylab("Accident Proportion in %")
#?percent
paste("There is a clear differnece between weekend nights and non-weekend nights accidents. In case of weekend nights
fatal and serious accidents occur more but in case of slight more accidents took place in non-weekend nights. we discovered that there’s more accidents during the rush hour time (4pm-6pm)")
## [1] "There is a clear differnece between weekend nights and non-weekend nights accidents. In case of weekend nights \n fatal and serious accidents occur more but in case of slight more accidents took place in non-weekend nights. we discovered that there’s more accidents during the rush hour time (4pm-6pm)"
#Accident based on Weather conditions
#glimpse(A_for)
A_for %>%
group_by(weather_conditions,accident_severity) %>%
summarize(total_accidents=n_distinct(accident_index)) %>%
mutate(freq = round((100*total_accidents / sum(total_accidents)),digits=2)) %>%
ggplot(aes(x=accident_severity, y=freq, fill=weather_conditions)) +
geom_bar(stat="identity", position="dodge")+
ggtitle("Accident Severity Proportion by Weather conditions") +
xlab("Accident Severity") + ylab("Accident Percentage")
paste("slight severity accident are not dependent on weather conditions whereas there are more chances of fatal and slight accidents in foggy or misty weather when the visibility is not clear")
## [1] "slight severity accident are not dependent on weather conditions whereas there are more chances of fatal and slight accidents in foggy or misty weather when the visibility is not clear"
#Accident severity by light condition
A_for %>%
group_by(light_conditions,accident_severity) %>%
summarize(total_accidents=n_distinct(accident_index)) %>%
mutate(freq = round((100*total_accidents / sum(total_accidents)),digits=2)) %>%
ggplot(aes(x=accident_severity, y=freq, fill=light_conditions)) +
geom_bar(stat="identity", position="dodge")+
ggtitle("Accident Severity Proportion by Light conditions") +
xlab("Accident Severity") + ylab("Accident Percentage")
#Accident severity by area type
A_for %>%
group_by(urban_or_rural_area,accident_severity) %>%
filter(urban_or_rural_area!="Unallocated")%>% #removing unallocated area
summarize(total_accidents=n_distinct(accident_index)) %>%
mutate(freq = round((100*total_accidents / sum(total_accidents)),digits=2)) %>%
ggplot(aes(x=accident_severity, y=freq, fill=urban_or_rural_area)) +
geom_bar(stat="identity", position="dodge")+
geom_text(aes(label=freq), vjust=2, color="black", size=3, position=position_dodge(width=0.9))+
ggtitle("Accident Severity Percentage by Area Type") +
xlab("Accident Severity") + ylab("Accident Severity %")
glimpse(A_for)
## Observations: 122,580
## Variables: 35
## $ accident_index <fct> 2018010080971, 2018010080…
## $ longitude <dbl> -0.139737, 0.046471, -0.1…
## $ latitude <dbl> 51.52459, 51.53965, 51.52…
## $ police_force <chr> "Metropolitan Police", "M…
## $ accident_severity <chr> "Slight", "Slight", "Slig…
## $ number_of_vehicles <int> 2, 1, 2, 2, 2, 2, 2, 3, 2…
## $ number_of_casualties <int> 2, 1, 1, 1, 2, 4, 1, 1, 1…
## $ date <date> 2018-01-01, 2018-01-01, …
## $ day_of_week <chr> "Monday", "Monday", "Mond…
## $ time <fct> 01:30, 00:50, 00:45, 03:0…
## $ local_authority_.district. <int> 2, 17, 3, 17, 6, 30, 12, …
## $ local_authority_.highway. <fct> E09000007, E09000025, E09…
## $ xfirst_road_class <int> 3, 4, 3, 4, 3, 3, 5, 3, 6…
## $ xfirst_road_number <int> 501, 165, 1, 167, 207, 10…
## $ road_type <chr> "Dual carriageway", "Sing…
## $ speed_limit <int> 30, 30, 20, 30, 30, 30, 3…
## $ junction_detail <chr> "Not at junction or withi…
## $ junction_control <chr> "Data missing or out of r…
## $ xsecond_road_class <int> -1, 6, 5, 3, -1, -1, 5, 3…
## $ xsecond_road_number <int> 0, 0, 0, 124, 0, 0, 0, 20…
## $ pedestrian_crossing.human_control <int> 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ pedestrian_crossing.physical_facilities <int> 0, 0, 5, 5, 0, 0, 0, 0, 8…
## $ light_conditions <chr> "Darkness - lights lit", …
## $ weather_conditions <chr> "Fine no high winds", "Fi…
## $ road_surface_conditions <chr> "Dry", "Dry", "Dry", "Wet…
## $ special_conditions_at_site <chr> "None", "None", "None", "…
## $ carriageway_hazards <chr> "None", "None", "None", "…
## $ urban_or_rural_area <chr> "Urban", "Urban", "Urban"…
## $ did_police_officer_attend_scene_of_accident <int> 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ lsoa_of_accident_location <fct> E01000854, E01003531, E01…
## $ geometry <POINT [m]> POINT (529150 18227…
## $ datetime <dttm> 2018-01-01 01:30:00, 201…
## $ datetime1 <fct> 2018-01-01 01:00:00, 2018…
## $ time_slot <dbl> 1, 0, 0, 3, 2, 1, 5, 4, 0…
## $ WE_night <chr> "No", "No", "No", "No", "…
#Accident Severity by Junction Type
#junction_detail
A_for %>%
group_by(junction_detail,accident_severity) %>%
filter(junction_detail!="Data missing or out of range")%>%
summarize(total_accidents=n_distinct(accident_index)) %>%
mutate(freq = round((100*total_accidents / sum(total_accidents)),digits = 2)) %>%
ggplot(aes(x=accident_severity, y=freq, fill=junction_detail)) +
geom_bar(stat="identity", position="dodge")+
ggtitle("Accident Severity % by Junction") +
xlab("Accident Severity") + ylab("Accident Severity %")
paste(" Accident happening on a roundabout is much more likely to be a slight accident and not likely at all to be a fatal accident. probability of an accident to be fatal is higher on road that ar enot a junction or within 20 metres of a junction. ")
## [1] " Accident happening on a roundabout is much more likely to be a slight accident and not likely at all to be a fatal accident. probability of an accident to be fatal is higher on road that ar enot a junction or within 20 metres of a junction. "
#model fitting
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.